#########################
### Results for Justice as Checks and Balances
### Human coding results
##########################

### This script uses the human coded sample of 3000 cases 
##  This code creates:
## Tables A3.1 and A3.2 

# Preliminaries --
rm(list=ls())
gc()


### Libraries
###########################
###--------------------------
# Relevant libraries
# Next, we download packages.
pkgs <- c("foreign", "dplyr", "ggplot2", "irr", "reshape", "stargazer")
for (pkg in pkgs) {
  if (! (pkg %in% rownames(installed.packages()))) { install.packages(pkg) }
  lapply(pkg, library, character.only=T )
}

### Read classification matrix
setwd("C:/Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/data/")
# big_matrix2 <- read.csv("classification_matrix.csv")
# #big_matrix3 <- big_matrix2
# # big_matrix2 <- big_matrix2 %>% dplyr::select(X, tema_tierras ,  tema_tax,  tema_elecc,tema_tf,tema_abuso,
# #                                         aut_virr , aut_loc_es , aut_loc_ind , aut_escr , aut_ind_pueb , aut_esp , aut_otro , aut_NS,
# #                                         jer_as , jer_des , jer_hor , jer_ns, resolution,conflict, resolution2,
# #                                         UserID ) %>% dplyr::rename(RowID=X)
# # write.csv(big_matrix2, "classification_matrix_2021.csv")
# # big_matrix2 <- read.csv("C:/Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/data/classification_matrix_2021.csv")
# 
# ### Read indios data and select variables
# # read agn_indios.csv
# setwd("C://Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/")
# agn_indios <- read.csv('agn_indios_2021.csv')
# indios_short <- agn_indios %>% dplyr::select(X, year, decade, name, protect_new2, pop_ratio, per_encomienda) #text
# indios_short <- indios_short %>% dplyr::rename(RowID=X)
# indios_short <- indios_short %>% filter(name!="Valladolid", name!="Guachinango", name!="Guanchinango")
# 
# ## merge
# big_matrix2 <- big_matrix2 %>% left_join(indios_short, by="RowID")
# 
# ## Filter
# big_matrix2 <- big_matrix2 %>% filter(!is.na(RowID))
# length(unique(big_matrix2$RowID))  # about 3,000
# 
# #### Consolidate one variable 
# big_matrix2 <- big_matrix2 %>% mutate(tema_physical=ifelse(tema_tf==1 | tema_abuso==1, 1,0))
# # big_matrix2 <- big_matrix2 %>% dplyr::select(X, tema_tierras ,  tema_tax,  tema_elecc,tema_tf,tema_abuso, tema_physical,
# #                                              aut_virr , aut_loc_es , aut_loc_ind , aut_escr , aut_ind_pueb , aut_esp , aut_otro , aut_NS,
# #                                              jer_as , jer_des , jer_hor , jer_ns, resolution,conflict, resolution2,
# #                                              UserID, year, decade,name, pop_ratio, per_encomienda) %>% dplyr::rename(RowID=X)
# big_matrix2 <- big_matrix2 %>% dplyr::select(-c(X.1))
# write.csv(big_matrix2, "C:/Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/data/classification_matrix_2021.csv")
big_matrix2 <- read.csv("C:/Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/data/classification_matrix_2021.csv")

#######################
##  
##   Human coding analysis (tables A3.1 and A3.2)
##
#######################

#### Main Regressions 
### All sample
model_1 <- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
                aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
                jer_as + jer_des + jer_hor + 
                UserID + as.factor(decade) + name*decade,
              data= big_matrix2        )
#summary(model_1)
model_1_vcov <- cluster.vcov(model_1 , ~ big_matrix2$name)
model_1_coef <- coeftest(model_1 , model_1_vcov )

#### Only clear conflict
model_2 <- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  tema_elecc+
                aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
                jer_as + jer_des + jer_hor + jer_ns +
                UserID  + as.factor(decade) + name*decade,
              data= big_matrix2[which(big_matrix2$conflict==1),])
#summary(model_2)
model_2_vcov <- cluster.vcov(model_2 , ~ big_matrix2[which(big_matrix2$conflict==1),]$name)
model_2_coef <- coeftest(model_2 , model_2_vcov )

### Only resolution non ambigous 
model_3 <- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
                aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
                jer_as + jer_des + jer_hor + jer_ns +
                UserID  + as.factor(decade) + name*decade,
              data= big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3),])

#summary(model_3)
model_3_vcov <- cluster.vcov(model_3 , ~ big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3),]$name)
model_3_coef <- coeftest(model_3 , model_3_vcov )


### Stargazer for first 3 models
## Table A3.1
#setwd("C://Users/franc/Dropbox/PhD/dissertation/paper_colonialism_liberalims/tex/new_version/")
stargazer(model_1, model_2, model_3, 
          se   = list(model_1_coef[,2],  model_2_coef[,2], model_3_coef[,2]
          ) ,
          p    = list(model_1_coef[,4],  model_2_coef[,4], model_3_coef[,4]
          ),
          dep.var.labels= c("Favorable Ruling"),
          covariate.labels =c("Land Conflicts","Mistreatment",  "Taxes", 
                              "Community"
          ),
          column.labels = c("All sample", "Clear Conflict Only", "Clear Conflict and Clear Resolution Only"),
          keep = c("tema_tierras", "tema_physical", "tema_tax", "tema_elecc"), 
          keep.stat = c("n", "adj.rsq"),
          add.lines=list(c("Province FE", "Y", "Y", "Y"),
                         c("Time FE", "Y", "Y", "Y"),
                         c("Province time trends", "Y", "Y", "Y"),
                         c("Coder FE", "Y", "Y", "Y"),
                         c("Producer Controls", "Y", "Y", "Y"),
                         c("Hierarchy Controls", "Y", "Y", "Y")
                         
          ),
          label="clasif_1",
          title =("Favorable Ruling and Topics (Human Coding)") 
          #out="model_quali1.tex"
          )

#----------------------------------------------
#### Testing balance of powers (Table A3.2)
#### Pop decrease
model_4 <- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
               aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
               jer_as + jer_des + jer_hor + jer_ns +
               UserID  +as.factor(decade) + name*decade,
             data= big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 
                                     & big_matrix2$pop_ratio<1 ),])

#summary(model_4)
model_4_vcov <- cluster.vcov(model_4 , ~ big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 
                                                           & big_matrix2$pop_ratio<1 ),]$name)
model_4_coef <- coeftest(model_4 , model_4_vcov )

#### Pop increase
model_5  <- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
                 aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
                 jer_as + jer_des + jer_hor + jer_ns +
                 UserID+   as.factor(decade) + name*decade,
               data= big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 
                                       & big_matrix2$pop_ratio>=1 ),])

#summary(model_5)
model_5_vcov <- cluster.vcov(model_5 , ~ big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 
                                                           & big_matrix2$pop_ratio>=1 ),]$name)
model_5_coef <- coeftest(model_5 , model_5_vcov )

##### LE Strong
enc_mean <- mean(big_matrix2$per_encomienda, na.rm=TRUE)
model_6<- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
               aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
               jer_as + jer_des + jer_hor + jer_ns +
               UserID  +as.factor(decade) + name*decade,
             data= big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                       big_matrix2$per_encomienda>=enc_mean),])


model_6_vcov <- cluster.vcov(model_6 , ~ big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                                              big_matrix2$per_encomienda>=enc_mean),]$name)
model_6_coef <- coeftest(model_6 , model_6_vcov )

##### LE weak
model_7<- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
               aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
               jer_as + jer_des + jer_hor + jer_ns +
               UserID  +as.factor(decade) + name*decade,
             data= big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                       big_matrix2$per_encomienda<enc_mean ),])

#summary(model_7)
model_7_vcov <- cluster.vcov(model_7 , ~ big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                                             big_matrix2$per_encomienda<enc_mean ),]$name)
model_7_coef <- coeftest(model_7 , model_7_vcov )


###### Balance of powers
##### Pop decreasing
### LE strong
model_8<- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
               aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
               jer_as + jer_des + jer_hor + jer_ns +
               UserID  +as.factor(decade) + name*decade,
             data= big_matrix2[which(big_matrix2$conflict==1  &  big_matrix2$resolution2!=3 &
                                       big_matrix2$per_encomienda>=enc_mean & big_matrix2$pop_ratio<1),])

#summary(model_8)
model_8_vcov <- cluster.vcov(model_8 , ~ big_matrix2[which(big_matrix2$conflict==1  &  big_matrix2$resolution2!=3 &
                                                             big_matrix2$per_encomienda>=enc_mean & big_matrix2$pop_ratio<1),]$name)
model_8_coef <- coeftest(model_8 , model_8_vcov )

### LE weak
model_9<- lm(resolution ~ tema_tierras + tema_physical + tema_tax+  + tema_elecc+
               aut_virr + aut_loc_es + aut_loc_ind + aut_escr + aut_ind_pueb + aut_esp + aut_otro + aut_NS+
               jer_as + jer_des + jer_hor + jer_ns +
               UserID  +as.factor(decade) + name*decade,
             data= big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                       big_matrix2$per_encomienda<enc_mean & big_matrix2$pop_ratio<1),])

#summary(model_9)
model_9_vcov <- cluster.vcov(model_9 , ~ big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                                             big_matrix2$per_encomienda<enc_mean & big_matrix2$pop_ratio<1),]$name)
model_9_coef <- coeftest(model_9 , model_9_vcov )

#### Units 
units_pop_dec <- length(unique(big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 
                                                 & big_matrix2$pop_ratio<1 ),]$name))

units_pop_inc <- length(unique(big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 
                                                & big_matrix2$pop_ratio>=1 ),]$name))

units_le_s <- length(unique(big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                               big_matrix2$per_encomienda>=enc_mean),]$name))

units_le_w <- length(unique(big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                                big_matrix2$per_encomienda<enc_mean),]$name))

units_pop_dec_le_s <- length(unique(big_matrix2[which(big_matrix2$conflict==1  &  big_matrix2$resolution2!=3 &
                                                        big_matrix2$per_encomienda>=enc_mean & big_matrix2$pop_ratio<1),]$name))

units_pop_dec_le_w <- length(unique(big_matrix2[which(big_matrix2$conflict==1 &  big_matrix2$resolution2!=3 &
                                                        big_matrix2$per_encomienda<enc_mean & big_matrix2$pop_ratio<1),]$name))

unit_line <- c("Units", units_pop_dec,units_pop_inc , units_le_s, units_le_w, units_pop_dec_le_s, units_pop_dec_le_w)

########
######## Table A3.2 
stargazer(model_4, model_5, model_6, model_7, model_8, model_9,
          se   = list(model_4_coef[,2],  model_5_coef[,2], model_6_coef[,2], model_7_coef[,2],  model_8_coef[,2], model_9_coef[,2]
          ) ,
          p    = list(model_4_coef[,4],  model_5_coef[,4], model_6_coef[,4], model_7_coef[,4],  model_8_coef[,4], model_9_coef[,4]
          ),
          dep.var.labels= c("Favorable Ruling"),
          covariate.labels =c("Land Conflicts","Mistreatment",  "Taxes", 
                              "Community"
          ),
          column.labels = c( "Population", "Local Elites", "Population Decrease"
          ),
          column.separate = c(2,2,2),
          keep = c("tema_tierras", "tema_physical", "tema_tax",  "tema_elecc"), 
          keep.stat = c("n", "adj.rsq"),
          add.lines=list(c("Province FE", "Y", "Y", "Y", "Y", "Y", "Y"),
                         c("Time LT", "Y", "Y", "Y", "Y", "Y", "Y"),
                         c("Province time trends", "Y", "Y", "Y", "Y", "Y", "Y"),
                         c("Coder FE", "Y", "Y", "Y", "Y", "Y", "Y"),
                         c("Producer Controls", "Y", "Y", "Y", "Y", "Y", "Y"),
                         c("Hierarchy Controls", "Y", "Y", "Y", "Y", "Y", "Y"), 
                         unit_line
                         
          ),
          label="clasif_2",
          title =("Favorable Ruling, Balance of Powers, and Topics (Human Coding)") 
          #out="model_quali1.tex"
          )



#######################
##  
##   Inter-coder reliability measures (Suplementary materials S4, Table S4.1)
##
#######################

## transform NAs and keep variables to correlat
na_s <- apply(big_matrix2,2,function(x) sum(is.na(x)) )

### Clean NAs
big_matrix3 <- big_matrix2 %>% mutate(conflict2=ifelse(is.na(conflict), 99, conflict),
                                      negative3=ifelse(is.na(negative2), 99, negative),
                                      resolution3=ifelse(is.na(resolution2), 99, resolution2)
                                      ##
                                      #aut_virr = ifelse(aut_virr==0,2, aut_virr ), 
                                      
) %>% dplyr::select(-c(conflict, negative, negative2, resolution, resolution2, year,decade, name,          
                       protect_new2, pop_ratio, per_encomienda, tema_physical))

## Transform to have obs as rows and users as columns
agreement_two    <- c()
agreement_three   <- c()
kappa        <- c()
kripp        <- c()

## Dataset with agreement 
big_matrix_4 <- matrix(nrow=2998, ncol =ncol(big_matrix3[,1:ncol(big_matrix3)])  )

### For loop ---------
for(c in 5:ncol(big_matrix3)){
  big_matrix_wide <- big_matrix3 %>% dplyr::select(UserID, RowID, names(big_matrix3)[c]) 
  #big_matrix_wide <- t(big_matrix_wide )
  big_matrix_wide <- melt(big_matrix_wide, id=c("UserID","RowID"))
  big_matrix_wide <- cast(big_matrix_wide, RowID ~ UserID + variable, mean )
  big_matrix_wide[is.na(big_matrix_wide)] <- NA
  big_matrix_wide <- big_matrix_wide[,2:ncol(big_matrix_wide)]
  
  ## Vectors for raters
  rater1 <- rep(NA, nrow(big_matrix_wide))
  rater2 <- rep(NA, nrow(big_matrix_wide))
  rater3 <- rep(NA, nrow(big_matrix_wide))
  
  ## For each variable, loop over responses and code each rate's response
  for(i in 1:nrow(big_matrix_wide )){
    this_r <- big_matrix_wide[i,]
    this_r <- this_r[!is.na(this_r)]
    rater1[i] <- this_r[1]; rater2[i] <- this_r[2]; rater3[i] <- this_r[3]
  }
  
  ## Put together by columns (nrows 2998)
  variable_irr <- cbind.data.frame(rater1,rater2, rater3)
  
  ### Calculate Kappa measure of agreement
  this_kappa <- kappam.fleiss(variable_irr)
  kappa[c-4] <- this_kappa$value ## ad to vector
  
  ## kripp Alpha
  # Transpose
  variable_irr_t <- t(variable_irr)
  kripp[c-4] <- kripp.alpha(variable_irr_t)$value
  
  ### Agreement (at least two )
  na_criteria <- apply(variable_irr, 1 , function(x) sum(is.na(x)))
  tem_agreement <- rep(F,nrow(variable_irr) )
  tem_agreement_three <- rep(F,nrow(variable_irr) )
  
  for(i in 1:nrow(variable_irr)){
    ### Cases with no missing
    if(na_criteria[i]==0){
      tem_agreement[i] <- (((variable_irr[i,1] == variable_irr[i,2]) &  (variable_irr[i,1]== variable_irr[i,3]))  |
                             ((variable_irr[i,1] == variable_irr[i,3])  | (variable_irr[i,2] == variable_irr[i,3]) | 
                                (variable_irr[i,1] == variable_irr[i,2])) )
      
      tem_agreement_three[i] <- (((variable_irr[i,1] == variable_irr[i,2]) &  (variable_irr[i,1]== variable_irr[i,3])) )
      
    }
    ### Cases with one missing
    if(na_criteria[i]==1){
      if(is.na(variable_irr[i,1])  ){
        tem_agreement[i] <- (variable_irr[i,2] == variable_irr[i,3]) 
        
      } 
      if(is.na(variable_irr[i,2])  ){
        tem_agreement[i] <- (variable_irr[i,1] == variable_irr[i,3]) 
        
      } 
      if(is.na(variable_irr[i,3])  ){
        tem_agreement[i] <- (variable_irr[i,1] == variable_irr[i,2]) 
        
      } 
      
      tem_agreement_three[i] <- NA
    }
    ### Cases with 2 or 3 missing (in this case is NA)
    if(na_criteria[i]==2 | na_criteria[i]==3){
      tem_agreement[i] <- NA
      tem_agreement_three[i] <- NA
    }
  }
  
  ### Check means of agreement 
  agreement_two[c-4]   <- mean(tem_agreement, na.rm=T )
  agreement_three[c-4] <- mean(tem_agreement_three, na.rm=T )
  
  ### Add agreement variable
  agreed_var <- rep(NA, length(tem_agreement))
  
  for(v in 1:length(tem_agreement)){
    ### Cases no missing
    if(na_criteria[v]==0){
      ## If all three agree
      if(((variable_irr[v,1] == variable_irr[v,2]) &  (variable_irr[v,1]== variable_irr[v,3]))){
        agreed_var[v] <- variable_irr[v,1]
      }
      ## If two agree( 1 and 3)
      if((variable_irr[v,1] == variable_irr[v,3])){
        agreed_var[v] <- variable_irr[v,1]
      }
      ## If two agree (2 and 3)
      if((variable_irr[v,2] == variable_irr[v,3])){
        agreed_var[v] <- variable_irr[v,2]
      }
      ### If two agree (1 and 2)
      if((variable_irr[v,2] == variable_irr[v,3])){
        agreed_var[v] <- variable_irr[v,2]
      }
    } 
    ## Cases 1 missing
    if(na_criteria[v]==1){
      ## If two agree( 1 and 3)
      if((variable_irr[v,1] == variable_irr[v,3]) & !is.na(variable_irr[v,1]) & !is.na(variable_irr[v,3])){
        agreed_var[v] <- variable_irr[v,1]
      }
      ## If two agree (2 and 3)
      if((variable_irr[v,2] == variable_irr[v,3]) & !is.na(variable_irr[v,2]) & !is.na(variable_irr[v,3])   ){
        agreed_var[v] <- variable_irr[v,2]
      }
      ### If two agree (1 and 2)
      if((variable_irr[v,1] == variable_irr[v,2]) & !is.na(variable_irr[v,1]) & !is.na(variable_irr[v,2]) ){
        agreed_var[v] <- variable_irr[v,2]
      }
    }
  }
  
  ### add column to new matrix for agreed values 
  big_matrix_4[,c-4] <- agreed_var 
  
}

## Means 
mean(agreement_two, na.rm = T)
mean(agreement_three, na.rm = T)
mean(kappa, na.rm = T)
mean(kripp, na.rm = T)

# Name vectors and create table for appendix
names(agreement_two) <- colnames(big_matrix3[,c(5:ncol(big_matrix3))] )
names(agreement_three) <- colnames(big_matrix3[,5:ncol(big_matrix3)])
names(kappa) <- colnames(big_matrix3[,5:ncol(big_matrix3)])
names(kripp) <- colnames(big_matrix3[,5:ncol(big_matrix3)])


#### Values for TABLE A3 in Online Appendix
round(kappa,4)



